In this section, we will be using HYSPLIT to find out where the air reaching Jakarta is coming from. We’ll combine this with PM 2.5 levels to understand where potential transboundary sources of air pollution lie.
Note that this document is more of an introduction than an actual scientific analysis, as many potential factors and sensitivy analyses won’t be included.
Beside more traditional packages, we weill be using three dedicated packages on air pollution, namely: - RCREA: CREA’s own package to retrieve air pollution levels - Splitr: a convenient wrapper for HYSPLIT calculation - OpenAIR: a very powerful package to analyse air trajectories
if(!require(rcrea)){devtools::install_github("energyandcleanair/rcrea"); require(rcrea)}
if(!require(splitr)){devtools::install_github("rich-iannone/splitr"); require(splitr)}
if(!require(openair)){devtools::install_github('davidcarslaw/openair'); library(openair)}
Other packages used for this tutorial are:
library(lubridate)
library(pbmcapply)
library(dplyr)
library(ggmap)
library(ggplot2)
Let’s take Jakarta and 1 September 2020 as an example.
HYSPLIT/Splitr needs some parameters: - weather dataset used to calculate trajectories - height of the ‘receptor’ - duration of the trajectories
met_type <- "reanalysis" # or gdas1
height <- 10 #meter
duration <- 72 #hours
We also specify the folder where weather data will be cached (can take few GBs), and results temporarily stored.
dir_hysplit_met <- "/Volumes/ext1/data/weather/hysplit_met/" # Folder where weather files will be downloaded/cached
dir_hysplit_output <- here::here("hysplit_output") # Folders where HYSPLIT results will be temporarily stored
Now we can run the backward trajectory calculations!
trajs_at_date <- function(date, met_type){
splitr::hysplit_trajectory(
lon = lon,
lat = lat,
height = height,
duration = duration,
days = lubridate::date(date),
daily_hours = c(0, 6, 12, 18), # The weather dataset is available every 6-hours
direction = "backward",
met_type = met_type,
extended_met = F,
met_dir = dir_hysplit_met,
exec_dir = dir_hysplit_output,
clean_up = F
)
}
trajs <- trajs_at_date(date, met_type)
The trajs dataframe contains the trajectories. Let’s plot them.
plot_trajs <- function(trajs){
# Get the boundary box: 50km around trajectories
buffer_km <- 50
bbox <- sf::st_as_sf(trajs, coords=c("lon","lat"), crs=4326) %>%
sf::st_transform(crs=3857) %>% # Reproject in pseudo-mercator to have meter unit
sf::st_buffer(buffer_km*1E3) %>%
sf::st_transform(crs=4326) %>%
sf::st_bbox() %>%
unname()
basemap <- ggmap::get_stamenmap(bbox = bbox,
zoom = 8)
ggmap(basemap) +
coord_cartesian() +
# trajectory lines
geom_path(data = trajs %>%
dplyr::arrange(hour_along) %>%
mutate(subcluster=paste(traj_dt_i, hour_along %/% 8)),
arrow = arrow(angle=18, length=unit(0.1,"inches")),
aes(x = lon, y = lat, group=subcluster), color="darkred", alpha=0.6) +
geom_path(data = trajs,
aes(x = lon, y = lat, group=traj_dt_i), color="darkred", alpha=0.6) +
# theme
theme(panel.background = element_rect(fill='lightgray'),
panel.border = element_rect(color='black', fill=NA),
panel.grid = element_line(color=NA),
plot.caption = element_text(lineheight = 0.9),
legend.position="right",
legend.key = element_rect(fill='white')) +
labs(title=paste("Sources or air flowing into Jakarta"),
subtitle = unique(lubridate::date(trajs$date)),
x='', y='',
caption=paste0("Source: CREA based on HYSPLIT. ",
"Weather dataset: ", met_type, " ",
"Receptor height: ", height, "m. ",
"Duration: ", duration, " hour."))
}
plot_trajs(trajs)
Using another weather dataset:
trajs_at_date(date, met_type="gdas1") %>%
plot_trajs()
We can repeat this at other dates.
trajs_at_date("2019-01-01", met_type) %>%
plot_trajs()
trajs_at_date("2019-01-01", met_type="gdas1") %>%
plot_trajs()
trajs_at_date("2019-11-01", met_type) %>%
plot_trajs()
trajs_at_date("2019-11-01", met_type="gdas1") %>%
plot_trajs()
The receptor height is another important factor, for which sensitivity analyses should be carried.
We might be interested in trajectories over a longer period of time. Here we’ll consider the whole of 2019.
date_from <- "2019-01-01"
date_to <- "2019-12-31"
dates <- seq(lubridate::date(date_from), lubridate::date(date_to), by="d")
dates_split <- split(dates, ceiling(seq_along(dates)/5)) # Compute 5 days at time per core
trajs_at_dates <- function(dates){
tryCatch({
hysplit_trajectory(
lon = lon,
lat = lat,
height = height,
duration = duration,
days = dates,
daily_hours = c(0, 6, 12, 18),
direction = "backward",
met_type = met_type,
extended_met = F,
met_dir = dir_hysplit_met,
exec_dir = dir_hysplit_output,
clean_up = F
)
},
error=function(c){
return(NA)
})
}
trajs.2019 <- do.call('rbind',
pbmclapply(dates_split, trajs_at_dates,
mc.cores = parallel::detectCores()-1))
We first need to make trajectories compatible with openair (e.g. rename or add certain fields):
format_for_openair <- function(trajs){
# Update fields to be compatible with OpenAIR
trajs$hour.inc <- trajs$hour_along
trajs$date <- trajs$traj_dt_i
trajs$date2 <- trajs$traj_dt
trajs$year <- lubridate::year(trajs$traj_dt_i)
trajs$month <- lubridate::month(trajs$traj_dt_i)
trajs$day <- lubridate::date(trajs$traj_dt_i)
return(trajs)
}
trajs.2019 <- format_for_openair(trajs.2019)
Let’s also add PM2.5 values, using CREA R package.
pm25.2019 <- rcrea::measurements(city="jakarta", source="openaq", date_from=date_from, date_to=date_to, poll=rcrea::PM25) %>%
mutate(date=lubridate::date(date))
trajs.poll.2019 <- trajs.2019 %>% left_join(pm25.2019, by=c("day"="date"))
plt.months <- trajPlot(
trajs.poll.2019,
pollutant = "value",
type = "month",
map = TRUE,
group = NA,
map.fill = TRUE,
map.res = "default",
map.cols = "grey40",
map.alpha = 0.4,
projection = "mercator",
parameters = NULL,
orientation = c(90, 0, 0),
grid.col = "transparent",
npoints = 12,
origin = TRUE
)
Zooming in on a month:
plot_trajs_month <- function(trajs, month){
plt.month <- trajPlot(
trajs %>% filter(lubridate::month(date)==!!month),
pollutant = "value",
type = "default",
map = TRUE,
group = NA,
map.fill = TRUE,
map.res = "default",
map.cols = "grey40",
map.alpha = 0.4,
projection = "mercator",
parameters = NULL,
orientation = c(90, 0, 0),
grid.col = "transparent",
npoints = 12,
origin = TRUE
)
}
plot_trajs_month(trajs.poll.2019, 8)
plot_trajs_month(trajs.poll.2019, 1)
Let’s overlay these trajectories with industries.
We’re using industry locations from PROPER programme.
proper <- read.csv(file.path('../data/proper_industries.csv')) %>%
mutate(Sector=recode(Sector,
`Petroleum, Chemicals & Plastics`="Petrochemicals & Plastics")) %>%
filter(Sector %in% c("Metal & non-metallic minerals",
"Petrochemicals & Plastics",
"Oil & Gas refining",
"Power & Energy",
"Cement, Steel & heavy industry",
"Glass"
))
plot_trajs_industries <- function(trajs, industries, subtitle){
# Get the boundary box: 50km around trajectories
buffer_km <- 100
bbox <- sf::st_as_sf(trajs, coords=c("lon","lat"), crs=4326) %>%
sf::st_transform(crs=3857) %>% # Reproject in pseudo-mercator to have meter unit
sf::st_buffer(buffer_km*1E3) %>%
sf::st_transform(crs=4326) %>%
sf::st_bbox() %>%
unname()
basemap <- ggmap::get_stamenmap(bbox = bbox,
zoom = 8,
where="/Volumes/ext1/data/basemap")
ggmap(basemap) +
coord_cartesian() +
# trajectory lines
geom_path(data = trajs %>%
dplyr::arrange(hour_along) %>%
mutate(subcluster=paste(traj_dt_i, hour_along %/% 8)),
arrow = arrow(angle=18, length=unit(0.1,"inches")),
aes(x = lon, y = lat, group=subcluster, color=value),
alpha=0.6) +
geom_path(data = trajs %>% filter(value<70),
aes(x = lon, y = lat, group=traj_dt_i, color=value), alpha=0.6) +
geom_path(data = trajs %>% filter(value>=70),
aes(x = lon, y = lat, group=traj_dt_i, color=value), alpha=0.6) +
# Industry
geom_point(data=industries, inherit.aes = F, aes(x=longitude,y=latitude, shape=Sector, fill=Sector),
alpha=0.8, position="jitter") +
# theme
scale_shape_manual(name="Sector", values=c(15,16,17,18,19,20)) +
scale_fill_brewer(name="Sector", palette="Dark2") +
scale_color_gradientn(colours = c("cyan","yellow","red")) +
theme(panel.background = element_rect(fill='lightgray'),
panel.border = element_rect(color='black', fill=NA),
panel.grid = element_line(color=NA),
plot.caption = element_text(lineheight = 0.9),
legend.position="right",
legend.key = element_rect(fill='white')) +
labs(title=paste("Sources or air flowing into Jakarta"),
subtitle = subtitle,
x='', y='',
caption=paste0("Source: CREA based on PROPER and HYSPLIT. ",
"Weather dataset: ", met_type, ". ",
"Receptor height: ", height, "m. ",
"Duration: ", duration, " hour."))
}
selected_month <- 8
plot_trajs_industries(
trajs=trajs.poll.2019 %>% filter(lubridate::month(date)==selected_month),
industries=proper,
subtitle=paste("Month:", selected_month))
Zooming in:
plot_trajs_industries(
trajs=trajs.poll.2019 %>% filter(lubridate::month(date)==selected_month),
industries=proper,
subtitle=paste("Month:", selected_month)) +
coord_cartesian(xlim=c(106.5,108), ylim=c(-8,-6))
Another month:
plot_trajs_industries(
trajs=trajs.poll.2019 %>% filter(lubridate::month(date)==1),
industries=proper,
subtitle=paste("Month:", 1))
OpenAIR package offers many interesting plotting and analysis fonctionalities: read here
For instance, we can identify the most representative air trajectories:
plot_traj_clusters <- function(trajs, n_clusters=4, statistics="frequency"){
openair::trajCluster(
trajs.2019,
method = "Euclid",
n.cluster = n_clusters,
plot = T,
type = "default",
cols = "Set1",
split.after = FALSE,
map.fill = TRUE,
map.cols = "grey40",
map.alpha = 0.4,
projection = "mercator",
parameters = NULL,
orientation = c(90, 0, 0),
by.type = FALSE,
origin = TRUE
)
}
(plt.trajs.2019 <- plot_traj_clusters(trajs.2019))